Submitted by:
ID1: 320983216 Name1: Maxim Kolchinsky
ID2: 321340580 Name2: Anna Romanov

The Biology

The data for this lesson comes from:
> Saigi et al. “MET-Oncogenic and JAK2-Inactivating Alterations Are Independent Factors That Affect Regulation of PD-L1 Expression in Lung Cancer” PLoS ONE. 2018 Jun 13;9(6):e99625. PMID: 24926665.

Purpose: The blockade of immune checkpoints such as PDL1 and PD-1 is being exploited therapeutically in several types of malignancies. Here, we aimed to understand the contribution of the genetics of lung cancer to the ability of tumor cells to escape immunosurveillance checkpoints. Experimental Design: More than 150 primary non-small cell lung cancers, including pulmonary sarcomatoid carcinomas, were tested for levels of the HLA-I complex, PD-L1, tumor-infiltrating CD8þ lymphocytes, and alterations in main lung cancer genes. Correlations were validated in cancer cell lines using appropriate treatments to activate or inhibit selected pathways. We also performed RNA sequencing to assess changes in gene expression after these treatments. Results: MET-oncogenic activation tended to associate with positive PD-L1 immunostaining, whereas STK11 mutations were correlated with negative immunostaining. In MET-altered cancer cells, MET triggered a transcriptional increase of PD-L1 that was independent of the IFNgmediated JAK/STAT pathway. The activation of MET also upregulated other immunosuppressive genes (PDCD1LG2 and SOCS1) and transcripts involved in angiogenesis (VEGFA and NRP1) and in cell proliferation. We also report recurrent inactivating mutations in JAK2 that co-occur with alterations in MET and STK11, which prevented the induction of immunoresponse-related genes following treatment with IFNg. Conclusions: We show that MET activation promotes the expression of several negative checkpoint regulators of the immunoresponse, including PD-L1. In addition, we report inactivation of JAK2 in lung cancer cells that prevented the response to IFNg. These alterations are likely to facilitate tumor growth by enabling immune tolerance and may affect the response to immune checkpoint inhibitors

Data

This data was downloaded from GEO (GSE:GSE109720)

Import count data and metadata

library(readr)
library(dplyr)
library(ggplot2)

rawcounts <- read_csv("data/lung_counts.csv")
metadata <-  read_csv("data/lung_metadata.csv")

rawcounts
metadata

Clustering - k-means

library(tibble)
library(tidyr)
# a plotting function
plot_heatmap <- function(km) {
  centers <- km$centers %>%
      tbl_df() %>%
      rownames_to_column('Cluster') %>%
      gather(Sample, value, -Cluster) %>%
      mutate(
          Cluster = factor(Cluster),
          Sample = factor(Sample)
      )
   ggplot(centers,aes(Sample,Cluster)) + geom_tile(aes(fill=value)) + geom_text(aes(label = round(value, 1)), angle=90, size=4) + theme(axis.text.x=element_text(angle=90, vjust=0.5, hjust=0, size=12))
}

# cluster the genes

rawcounts_RN<- data.frame(rawcounts, row.names=1)
km <- kmeans(rawcounts_RN,5)
km$centers
##        AE1148      AE1149      AE1150     AE1151      AE1152      AE1153
## 1   6589.0833   8841.2332   6127.4839   7358.684   7346.2171   8890.0477
## 2  31052.4260  42212.6686  27571.1893  34696.260  32887.3373  41325.6095
## 3 101059.4762 147734.7143  89713.5238 117618.429 120754.0000 151602.5238
## 4 261075.5000 355397.5000 249188.5000 298081.500 312029.0000 330232.5000
## 5    204.3794    260.8659    184.8346    225.175    226.5053    263.4797
##        AE1154      AE1155      AE1156      AE1157      AE1158     AE1159
## 1   5868.3233   7272.5940   6356.2941   8001.1913   6352.8116   7040.299
## 2  26505.6568  33457.2663  29596.4379  37330.6331  30042.5799  32418.935
## 3  90998.6190 124462.2857 104368.4286 134149.9048 106395.0952 117189.476
## 4 222399.5000 302143.5000 258949.5000 295158.0000 257717.0000 284918.500
## 5    177.6818    222.9686    196.5011    232.7849    189.8281    212.521
##       AE1160      AE1161      AE1162      AE1163      AE1164      AE1165
## 1   7666.075   5913.3574   7290.1723   6847.2006   6203.9289   6185.2936
## 2  35868.917  27201.2367  34315.4142  31814.4260  29037.9941  29120.4734
## 3 143324.762 104374.0952 132841.0000 122184.8571 107568.4286 112637.7143
## 4 413308.500 281465.5000 419312.0000 335008.5000 264320.5000 344465.0000
## 5    242.584    180.0566    253.0504    219.7236    191.4948    214.1561
##        AE1166      AE1167      AE1168      AE1169      AE1170      AE1171
## 1   7557.6928   6488.0501   6934.8676   7469.2434   5595.4722   6493.8734
## 2  34757.7219  29575.0533  32625.8402  36791.6982  27468.7456  31881.7219
## 3 135000.0476 111633.0476 126291.2857 169666.4762 124785.2381 141406.8095
## 4 384117.5000 304171.0000 421639.5000 605629.0000 430845.0000 507135.5000
## 5    241.4646    198.3505    238.9257    253.7209    189.6234    218.4082
##        AE1172      AE1173      AE1174     AE1175      AE1176      AE1177
## 1   5550.7936   6760.0141   6311.3476   6946.255   5771.8062   6654.9367
## 2  27203.1006  33370.0118  31380.9112  29166.260  24118.6923  28170.8698
## 3 117254.7619 146959.6667 137927.1429 127871.333 104154.3333 123308.0952
## 4 422003.5000 530275.0000 492796.5000 382136.500 308577.5000 364062.5000
## 5    185.7504    226.5967    210.5955    222.858    183.9239    211.8739
##        AE1178      AE1179      AE1180
## 1   5181.0136   5586.3374   7055.9640
## 2  21687.6036  23821.4024  29880.8935
## 3  94938.3333 104582.6190 133835.4286
## 4 280669.0000 308508.0000 400801.0000
## 5    165.6252    178.0214    225.1644
num_genes <- table(km$cluster)

num_genes
## 
##     1     2     3     4     5 
##  2054   169    21     2 56101
plot(num_genes, type="l", main='num of genes in each cluster')

plot_heatmap(km)

# remove rows of zeroes
rawcounts_RN$sum <- rowSums(rawcounts_RN)
rawcounts_clean <- rawcounts_RN[ rawcounts_RN$sum >= 10, ]
rawcounts_clean
rawcounts_clean$sum <- NULL

# apply log scaling
rawcounts_RN
rawcounts_log = log10(1+rawcounts_clean)
km <- kmeans(rawcounts_log,5)
km$centers
##      AE1148    AE1149    AE1150    AE1151    AE1152    AE1153    AE1154
## 1 0.2224750 0.2595178 0.2365299 0.2465905 0.2664494 0.2856338 0.2270980
## 2 0.9926849 1.0658002 0.9937652 1.0406866 1.0859241 1.1021633 0.9782394
## 3 2.0053516 2.0718609 1.9683162 2.0457680 2.0753388 2.0995963 1.9408546
## 4 2.8768341 2.9629047 2.8159122 2.9162445 2.9151470 2.9548990 2.7748572
## 5 3.5738212 3.6867595 3.5233650 3.6183988 3.6159370 3.6748730 3.4829931
##      AE1155    AE1156    AE1157    AE1158    AE1159    AE1160    AE1161
## 1 0.2611225 0.2488747 0.2554619 0.2473489 0.2562278 0.2735666 0.2295744
## 2 1.0667283 1.0603057 1.0567061 1.0293423 1.0579908 1.0154241 0.8954655
## 3 2.0608478 2.0370740 2.0484974 1.9979362 2.0467039 1.8938259 1.7459993
## 4 2.9035514 2.8555548 2.9088671 2.8171388 2.8838187 2.9247148 2.7762690
## 5 3.6039529 3.5517147 3.6395846 3.5207057 3.5946933 3.6473609 3.5152520
##      AE1162    AE1163    AE1164    AE1165    AE1166    AE1167    AE1168
## 1 0.3069437 0.2660096 0.2308454 0.2634969 0.2683428 0.2351569 0.2865346
## 2 1.0746557 0.9825569 0.9117918 0.9844687 1.0037558 0.9144569 1.0278350
## 3 1.9523912 1.8613406 1.7745325 1.8706251 1.8907524 1.7858088 1.9209397
## 4 2.9481320 2.8845318 2.8080246 2.8748433 2.9221639 2.8221034 2.9211485
## 5 3.6335486 3.5997041 3.5385613 3.5622239 3.6428058 3.5588146 3.6114502
##      AE1169   AE1170    AE1171    AE1172   AE1173    AE1174    AE1175
## 1 0.4410393 0.373576 0.4068394 0.3665689 0.409207 0.3943318 0.2684219
## 2 1.1728400 1.052466 1.1166752 1.0400485 1.123027 1.1024374 0.9364739
## 3 2.0208283 1.892123 1.9587703 1.8819145 1.968288 1.9379473 1.8364030
## 4 2.9333166 2.806446 2.8682297 2.7963220 2.882997 2.8517416 2.8476059
## 5 3.6191436 3.491511 3.5584802 3.4891624 3.575307 3.5434462 3.5860752
##      AE1176    AE1177    AE1178    AE1179    AE1180
## 1 0.2289570 0.2646176 0.2237919 0.2291777 0.2697421
## 2 0.8611594 0.9207595 0.8351714 0.8435144 0.9420726
## 3 1.7454409 1.8129884 1.7102739 1.7326465 1.8332838
## 4 2.7616280 2.8245047 2.7176708 2.7502298 2.8514261
## 5 3.5038501 3.5662006 3.4583779 3.4908875 3.5926027
num_genes <- table(km$cluster)

num_genes
## 
##    1    2    3    4    5 
## 9311 4629 3569 5789 4401
plot(num_genes, type="l", main='num of genes in each cluster')

plot_heatmap(km)

# Apply standardization
rawcounts_scaled <- scale(rawcounts_log)
km <- kmeans(rawcounts_scaled,5)
km$centers
##       AE1148     AE1149     AE1150     AE1151     AE1152     AE1153
## 1  0.2528079  0.2451968  0.2458377  0.2538863  0.2649167  0.2567951
## 2  0.8964867  0.8893151  0.8828349  0.8935212  0.8863283  0.8799729
## 3  1.4114811  1.4119526  1.4156985  1.4092971  1.4048429  1.4039390
## 4 -1.0693269 -1.0688768 -1.0633402 -1.0722110 -1.0782449 -1.0693484
## 5 -0.5008578 -0.4873795 -0.4943596 -0.4901676 -0.4734303 -0.4761762
##       AE1154     AE1155     AE1156     AE1157     AE1158     AE1159
## 1  0.2447623  0.2625378  0.2683054  0.2498533  0.2583316  0.2608463
## 2  0.8760129  0.8860388  0.8821716  0.8804559  0.8744648  0.8826296
## 3  1.4127032  1.4041744  1.4048258  1.4157064  1.4051733  1.4108421
## 4 -1.0586070 -1.0739779 -1.0785432 -1.0690393 -1.0662110 -1.0741021
## 5 -0.4916615 -0.4791412 -0.4702332 -0.4830918 -0.4779076 -0.4795858
##       AE1160     AE1161     AE1162     AE1163     AE1164     AE1165
## 1  0.1468150  0.1198996  0.1673943  0.1452124  0.1274776  0.1580055
## 2  0.8973837  0.8894981  0.8979515  0.8978315  0.8936499  0.9007449
## 3  1.4240540  1.4423877  1.4017358  1.4245303  1.4359796  1.4101721
## 4 -1.0365985 -1.0180156 -1.0427133 -1.0321912 -1.0220277 -1.0340571
## 5 -0.4976051 -0.5213826 -0.4809970 -0.5061895 -0.5183713 -0.5024793
##       AE1166     AE1167     AE1168     AE1169     AE1170     AE1171
## 1  0.1481989  0.1284032  0.1652151  0.1754403  0.1611897  0.1696153
## 2  0.8979212  0.8933030  0.8994738  0.8715029  0.8722969  0.8709739
## 3  1.4222051  1.4381401  1.4073590  1.3956382  1.4062621  1.4039338
## 4 -1.0346146 -1.0215992 -1.0380963 -1.0333268 -1.0232746 -1.0306172
## 5 -0.5015744 -0.5215339 -0.4957272 -0.4671921 -0.4872764 -0.4752458
##       AE1172     AE1173     AE1174     AE1175     AE1176     AE1177
## 1  0.1597162  0.1691050  0.1655199  0.1384030  0.1238105  0.1326525
## 2  0.8720849  0.8718143  0.8711739  0.8850181  0.8846325  0.8835753
## 3  1.4123471  1.4040635  1.4059962  1.4240011  1.4340972  1.4275570
## 4 -1.0237152 -1.0311316 -1.0302534 -1.0126447 -1.0052797 -1.0095451
## 5 -0.4907044 -0.4749960 -0.4750045 -0.5235211 -0.5360008 -0.5268240
##       AE1178     AE1179     AE1180
## 1  0.1194956  0.1214050  0.1338500
## 2  0.8831780  0.8858309  0.8852047
## 3  1.4386298  1.4360721  1.4257434
## 4 -1.0017273 -1.0019488 -1.0131401
## 5 -0.5422155 -0.5441582 -0.5208928
num_genes <- table(km$cluster)

num_genes
## 
##    1    2    3    4    5 
## 3580 5810 4362 9297 4650
plot(num_genes, type="l", main='num of genes in each cluster')

plot_heatmap(km)

# load DE gene list
DE_res <- read_csv("data/sigresults.csv")
DE_genes <- DE_res$row

# example of filtering a data frame
# good_raw_names <- row.names(rawcounts_RN[runif(5,0,nrow(rawcounts_RN)),])
# good_raw_names
dat_filtered = rawcounts_log[DE_genes,]
rawcounts_scaled <- scale(dat_filtered)
km <- kmeans(rawcounts_scaled,5)
km$centers
##       AE1148     AE1149     AE1150     AE1151     AE1152     AE1153
## 1 -0.5679043 -0.5710244 -0.5646092 -0.5635655 -0.5572134 -0.5538821
## 2  0.1305102  0.1190975  0.1131888  0.1278225  0.1246943  0.1182771
## 3  0.6921359  0.6883703  0.6834653  0.6900177  0.6837953  0.6769214
## 4 -1.4186601 -1.4042857 -1.4001178 -1.4174385 -1.4131434 -1.3985486
## 5  1.2642194  1.2681902  1.2715872  1.2648753  1.2672687  1.2631622
##       AE1154     AE1155     AE1156     AE1157     AE1158     AE1159
## 1 -0.5600944 -0.5577884 -0.5500426 -0.5612359 -0.5515806 -0.5594106
## 2  0.1109551  0.1260730  0.1230321  0.1107905  0.1136236  0.1195345
## 3  0.6738198  0.6808053  0.6788820  0.6817834  0.6711872  0.6821012
## 4 -1.3866043 -1.4083430 -1.4123522 -1.4019121 -1.3912170 -1.4097971
## 5  1.2679550  1.2649363  1.2695092  1.2766971  1.2667223  1.2747334
##       AE1160      AE1161     AE1162     AE1163      AE1164     AE1165
## 1 -0.6829497 -0.69230036 -0.6637865 -0.6804427 -0.68754767 -0.6694058
## 2  0.1080044  0.08733502  0.1234108  0.1078759  0.09057776  0.1204023
## 3  0.7025155  0.70183119  0.6956437  0.7017723  0.70309774  0.6989594
## 4 -1.3155995 -1.29956885 -1.3176688 -1.3158176 -1.30384606 -1.3179860
## 5  1.2526127  1.26701374  1.2272945  1.2516227  1.26120947  1.2316046
##       AE1166      AE1167     AE1168      AE1169      AE1170      AE1171
## 1 -0.6764159 -0.68630044 -0.6615795 -0.72306084 -0.73085504 -0.73275212
## 2  0.1069523  0.09250882  0.1207823  0.06216534  0.06053769  0.05927586
## 3  0.7034191  0.70400625  0.6985243  0.68600817  0.68696520  0.68710566
## 4 -1.3177964 -1.30796821 -1.3229750 -1.23180963 -1.22748292 -1.23049206
## 5  1.2477220  1.26169383  1.2302591  1.26464953  1.26749226  1.27559219
##        AE1172      AE1173      AE1174      AE1175      AE1176      AE1177
## 1 -0.73003646 -0.73077177 -0.73333318 -0.69473459 -0.70456188 -0.69613378
## 2  0.05774518  0.05864332  0.05696841  0.08755498  0.08122156  0.08295579
## 3  0.68837087  0.68793246  0.68714281  0.69946044  0.70135830  0.69999229
## 4 -1.23359967 -1.23164832 -1.22851461 -1.27756735 -1.27180358 -1.27539189
## 5  1.27647625  1.27414198  1.27614209  1.24116187  1.24831702  1.24429253
##        AE1178      AE1179      AE1180
## 1 -0.70449348 -0.69822741 -0.69284635
## 2  0.07953479  0.08273517  0.08558701
## 3  0.70144498  0.70187376  0.70028015
## 4 -1.27263484 -1.27849094 -1.27944355
## 5  1.25145116  1.24796525  1.24261055
num_genes <- table(km$cluster)

num_genes
## 
##    1    2    3    4    5 
## 3055 3267 5158 3895 2582
plot(num_genes, type="l", main='num of genes in each cluster')

plot_heatmap(km)

# a plotting function
plot_heatmap <- function(km) {
  centers <- km$centers %>%
      tbl_df() %>%
      rownames_to_column('Cluster') %>%
      gather(Gene, value, -Cluster) %>%
      mutate(
          Cluster = factor(Cluster),
          Gene = factor(Gene)
      )
   ggplot(centers,aes(Cluster,Gene)) + geom_tile(aes(fill=value)) + theme(axis.text.x=element_text(angle=0, vjust=0.5, hjust=0, size=12)) + theme(axis.text.y=element_blank())
}

# cluster the samples
rawcounts_clean <- rawcounts_RN[ rawcounts_RN$sum >= 10, ]
rawcounts_log = log10(1+rawcounts_clean)
DE_res <- read_csv("data/sigresults.csv")
DE_genes <- DE_res$row
dat_filtered = rawcounts_log[DE_genes,]
km <- kmeans(t(dat_filtered),4)
#km$centers
table(km$cluster)
## 
##  1  2  3  4 
##  6 10 12  6
plot_heatmap(km)

Record sessionInfo()

The sessionInfo() prints version information about R and any attached packages. It’s a good practice to always run this command at the end of your R session and record it for the sake of reproducibility in the future.

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17134)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] bindrcpp_0.2.2 tidyr_0.8.2    tibble_1.4.2   dplyr_0.7.8   
## [5] readr_1.3.0    ggplot2_3.1.0  knitr_1.21    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.0       pillar_1.3.1     compiler_3.5.1   plyr_1.8.4      
##  [5] bindr_0.1.1      tools_3.5.1      digest_0.6.18    evaluate_0.12   
##  [9] gtable_0.2.0     pkgconfig_2.0.2  rlang_0.3.0.1    yaml_2.2.0      
## [13] xfun_0.4         withr_2.1.2      stringr_1.3.1    hms_0.4.2       
## [17] grid_3.5.1       tidyselect_0.2.5 glue_1.3.0       R6_2.3.0        
## [21] rmarkdown_1.11   purrr_0.2.5      magrittr_1.5     codetools_0.2-15
## [25] scales_1.0.0     htmltools_0.3.6  assertthat_0.2.0 colorspace_1.3-2
## [29] labeling_0.3     stringi_1.2.4    lazyeval_0.2.1   munsell_0.5.0   
## [33] crayon_1.3.4